On cherche à étudier l’effet de trois facteurs sur le transcriptome des racines d’Arabidopsis thaliana. Le CO2, au cours des études préliminaires, s’est montré peu influent en conditions contrôles de fer et de nitrates, et accentué en cas de stress nutritionnel. Nous reprennons ces résultats avec des fonctions génériques et propres pour en faire le résumé et de jolis graphes.
## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec = dec, :
## EOF within quoted string
## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec = dec, :
## number of items read is not a multiple of the number of columns
On a, pour chaque gène et chaque condition, son niveau d’expression en sortie de quantification. On labelle les conditions avec le code suivant : lettre majuscule pour le niveau fort, minuscule pour le niveau faible. Le réplicat est donné après l’underscore.
# quantification file
data <- read.csv("quantifFiles/QuantifGenesTomate.csv", h = T, sep = ",")
rownames(data) <- data$Gene
genes = which(!(grepl("__", rownames(data))))
not_quant = data[which((grepl("__", rownames(data)))), ]
data = data[genes, grepl("R", colnames(data))]
getLabel("R6")[1] "cnF_3"
[1] At_AmbientCO2_LowNitrate_Fe1
48 Levels: At_AmbientCO2_HighNitrate_Fe1 ... Sl_ElevatedCO2_LowNitrate_FeStarvation3
[1] "At_AmbientCO2_LowNitrate_Fe"
keep <- rowSums(data) >= 10
data <- data[keep, ]
group <- sapply(colnames(data), getLabel, with.rep = F)
colnames(data) <- sapply(colnames(data), getLabel)
head(data) Cnf_3 cNf_3 cNf_2 cnF_1 cnF_2 cnF_3 cNF_1 CNF_1 cNF_3 CNF_3 CNF_2
Sly00g0382751 400 320 259 280 464 370 228 321 282 348 261
Sly00g0382761 1326 755 910 1026 1429 1303 665 1030 771 1111 943
Sly00g0382831 0 1 0 0 1 0 0 0 1 4 0
cNF_2 CnF_3 cNf_1 CnF_2 CnF_1 Cnf_2 Cnf_1 CNf_2 CNf_3 cnf_2 cnf_1
Sly00g0382751 302 309 306 264 238 312 269 300 223 552 342
Sly00g0382761 882 1467 912 987 1017 1063 1146 825 791 1392 1000
Sly00g0382831 0 0 0 0 0 0 0 0 1 2 0
cnf_3 CNf_1
Sly00g0382751 401 216
Sly00g0382761 1049 687
Sly00g0382831 0 3
[ reached 'max' / getOption("max.print") -- omitted 3 rows ]
[1] 26984 24
On définit les conditions contrôle comme suit : CO2 ambiant et fort fer.
g = list()
method = "edger"
labels <- c("cNF", "cnF")
genes1 <- dualDE(data, labels, pval = 0.01, method = method) (Intercept) groupcnF
1 1 0
2 1 0
3 1 0
4 1 1
5 1 1
6 1 1
attr(,"assign")
[1] 0 1
attr(,"contrasts")
attr(,"contrasts")$group
[1] "contr.treatment"
[1] "cNF_1" "cNF_3" "cNF_2" "cnF_1" "cnF_2" "cnF_3"
cNF_1 cNF_3 cNF_2 cnF_1 cnF_2 cnF_3
0.9303605 0.9965831 1.0144547 1.0136912 1.0475295 0.9973810
[1] "4433 genes DE"
g[[paste(labels[1], labels[2])]] = as.vector(genes1)
a <- OntologyProfile(genes1$gene_id, specie = specie)[1] "3003not in the Micro-Tom annotation..."
(Intercept) groupCnF
1 1 0
2 1 0
3 1 0
4 1 1
5 1 1
6 1 1
attr(,"assign")
[1] 0 1
attr(,"contrasts")
attr(,"contrasts")$group
[1] "contr.treatment"
[1] "CNF_1" "CNF_3" "CNF_2" "CnF_3" "CnF_2" "CnF_1"
CNF_1 CNF_3 CNF_2 CnF_3 CnF_2 CnF_1
0.9629319 0.9647597 0.9645826 1.0326976 1.0393421 1.0356862
[1] "8691 genes DE"
[1] "3003not in the Micro-Tom annotation..."
(Intercept) groupcnf
1 1 0
2 1 0
3 1 0
4 1 1
5 1 1
6 1 1
attr(,"assign")
[1] 0 1
attr(,"contrasts")
attr(,"contrasts")$group
[1] "contr.treatment"
[1] "cNf_3" "cNf_2" "cNf_1" "cnf_2" "cnf_1" "cnf_3"
cNf_3 cNf_2 cNf_1 cnf_2 cnf_1 cnf_3
1.0335785 0.9944357 1.0134083 0.9769374 1.0075792 0.9740609
[1] "4743 genes DE"
[1] "3003not in the Micro-Tom annotation..."
(Intercept) groupCnf
1 1 0
2 1 0
3 1 0
4 1 1
5 1 1
6 1 1
attr(,"assign")
[1] 0 1
attr(,"contrasts")
attr(,"contrasts")$group
[1] "contr.treatment"
[1] "CNf_2" "CNf_3" "CNf_1" "Cnf_3" "Cnf_2" "Cnf_1"
CNf_2 CNf_3 CNf_1 Cnf_3 Cnf_2 Cnf_1
0.9743556 0.9973626 0.9477775 1.0807406 1.0032137 0.9965499
[1] "6628 genes DE"
[1] "3003not in the Micro-Tom annotation..."
On visualise les gènes différentiellement exprimés en commun entre les différents niveaux des autres facteurs.
library(ggVennDiagram)
library(VennDiagram)
gene_list <- list()
for (comp in names(g)) {
print(g[[comp]])
gene_list[[comp]] <- g[[comp]]$gene_id
} gene_id a.value m.value p.value q.value rank
1 Sly10g0188051 12.598582 -4.400584 1.609545e-175 4.343196e-171 1
2 Sly03g0217781 6.584976 8.905286 4.683400e-163 6.318844e-159 2
3 Sly02g0332441 7.857178 -5.902748 1.202441e-135 1.081555e-131 3
4 Sly03g0211641 9.605543 -6.053109 9.062039e-135 6.113251e-131 4
5 Sly08g0272441 8.122122 5.750390 3.240823e-122 1.749007e-118 5
6 Sly08g0271991 8.396313 5.090614 2.400292e-117 1.079492e-113 6
7 Sly06g0378751 6.543756 -6.646114 5.527006e-113 2.130582e-109 7
8 Sly01g0048331 10.075922 4.631398 1.595590e-108 5.381927e-105 8
9 Sly05g0138531 7.447777 -5.741914 2.183699e-99 6.547215e-96 9
estimatedDEG upreg
1 1 0
2 1 1
3 1 0
4 1 0
5 1 1
6 1 1
7 1 0
8 1 1
9 1 0
[ reached 'max' / getOption("max.print") -- omitted 4424 rows ]
gene_id a.value m.value p.value q.value rank
1 Sly02g0332441 7.385647 -8.203478 5.283095e-309 1.425590e-304 1
2 Sly08g0272441 7.838470 7.455207 1.035144e-307 1.396616e-303 2
3 Sly02g0328821 7.879031 -8.217107 2.492270e-283 2.241714e-279 3
4 Sly08g0277811 11.514679 5.675296 7.572690e-257 5.108536e-253 4
5 Sly03g0217781 6.471761 8.424294 1.089274e-245 5.878592e-242 5
6 Sly10g0182661 9.176833 6.614196 7.474080e-237 3.361343e-233 6
7 Sly05g0138531 7.126372 -7.466013 2.012913e-235 7.759493e-232 7
8 Sly03g0211641 10.268741 -7.599446 2.813881e-234 9.491220e-231 8
9 Sly05g0156491 8.299404 -5.700299 4.248959e-230 1.273932e-226 9
estimatedDEG upreg
1 1 0
2 1 1
3 1 0
4 1 1
5 1 1
6 1 1
7 1 0
8 1 0
9 1 0
[ reached 'max' / getOption("max.print") -- omitted 8682 rows ]
gene_id a.value m.value p.value q.value rank
1 Sly01g0048331 9.614645 6.157454 1.085182e-150 2.928256e-146 1
2 Sly03g0217781 7.327969 7.254249 2.790310e-149 3.764687e-145 2
3 Sly08g0272441 7.579327 7.580214 4.690744e-137 4.219168e-133 3
4 Sly02g0328821 6.804451 -10.094779 2.707468e-102 1.826458e-98 4
5 Sly09g0105051 3.834089 10.751329 4.282543e-98 2.311203e-94 5
6 Sly08g0277811 10.847881 4.102475 2.601345e-94 1.169912e-90 6
7 Sly03g0206861 4.455075 9.993302 3.853732e-90 1.485559e-86 7
8 Sly12g0049231 10.524061 4.162681 4.079700e-86 1.376083e-82 8
9 Sly10g0183051 8.799731 3.772482 4.444988e-83 1.332706e-79 9
estimatedDEG upreg
1 1 1
2 1 1
3 1 1
4 1 0
5 1 1
6 1 1
7 1 1
8 1 1
9 1 1
[ reached 'max' / getOption("max.print") -- omitted 4734 rows ]
gene_id a.value m.value p.value q.value rank
1 Sly12g0050491 6.155583 -11.075034 1.051056e-248 2.836169e-244 1
2 Sly07g0120541 5.609891 -11.554106 1.098937e-237 1.482685e-233 2
3 Sly12g0050501 5.180435 -11.865119 6.853739e-234 6.164710e-230 3
4 Sly02g0328821 8.516604 -9.930439 7.947584e-207 5.361440e-203 4
5 Sly09g0104401 5.477974 -10.449643 3.362879e-197 1.814879e-193 5
6 Sly12g0050511 4.884061 -11.056410 2.115714e-189 9.515073e-186 6
7 Sly12g0050521 7.019000 -8.208199 1.416565e-186 5.460655e-183 7
8 Sly02g0334031 7.498396 -6.784197 1.932987e-174 6.519965e-171 8
9 Sly06g0374881 6.711467 -8.920837 8.286132e-174 2.484366e-170 9
estimatedDEG upreg
1 1 0
2 1 0
3 1 0
4 1 0
5 1 0
6 1 0
7 1 0
8 1 0
9 1 0
[ reached 'max' / getOption("max.print") -- omitted 6619 rows ]
d <- data.frame(matrix(ncol = 4, nrow = 2))
colnames(d) <- names(g)
row.names(d) <- c("up", "down")
for (comp in names(g)) {
d["up", comp] <- sum(g[[comp]]$upreg == 1)
d["down", comp] <- sum(g[[comp]]$upreg == 0)
}
res <- melt(d)
res$reg = rep(c("up", "down"), 4)
genesNitrateAt <- res
# save(genesNitrateAt, file = 'genesNitrateAt.RData')
ggplot(res, aes(fill = reg, y = value, x = variable)) + geom_bar(position = "stack",
stat = "identity", alpha = 0.5, color = "black") + ggtitle("Nitrate effet on gene regulation") +
xlab("") + ylab("Number of differentially expressed genes") + coord_flip()common_genes <- unlist(partitions[1, "..values.."])
results <- getBM(filters = "ensembl_gene_id", attributes = c("ensembl_gene_id", "description",
"external_gene_name", "entrezgene_id"), values = common_genes, mart = mart)
results <- results[!rownames(results) %in% which(duplicated(results$ensembl_gene_id)),
]
kable(results)| ensembl_gene_id | description | external_gene_name | entrezgene_id |
|---|
sharedBy3 <- unique(unlist(subset(partitions, partitions$shared == 3)$..values..))
a <- OntologyProfile(sharedBy3, specie = specie)[1] "3003not in the Micro-Tom annotation..."